source("pscore_estimation_core1.R")
library(Matching)

load(file="RData.simdata.G.obs1000.reps1000")
dta <- simdata.G.obs1000.reps1000
rm(simdata.G.obs1000.reps1000)
gc()

#true estimate
g1 <- -0.4

sims <- dim(dta)[2]
nobs <- length(dta[1,2]$w1)

true.est <- matrix(0, nrow=sims, ncol=1)

logit.pscore <- list()
logit.est <- matrix(0, nrow=sims, ncol=1)
logit.nSE <- matrix(0, nrow=sims, ncol=1)
logit.aiSE <- matrix(0, nrow=sims, ncol=1)

boosting.pscore <- list()
boosting.desc <- list()
boosting.est <- matrix(0, nrow=sims, ncol=1)
boosting.nSE <- matrix(0, nrow=sims, ncol=1)
boosting.aiSE <- matrix(0, nrow=sims, ncol=1)

forest.pscore <- list()
forest.est <- matrix(0, nrow=sims, ncol=1)
forest.nSE <- matrix(0, nrow=sims, ncol=1)
forest.aiSE <- matrix(0, nrow=sims, ncol=1)

nnet.pscore <- list()
nnet.est <- matrix(0, nrow=sims, ncol=1)
nnet.nSE <- matrix(0, nrow=sims, ncol=1)
nnet.aiSE <- matrix(0, nrow=sims, ncol=1)

count <- 0
for (s in 1:sims)
  {
    cat("\n\ns:",s,"\n")
    count <- count+1

    sdta <- as.data.frame(dta[,s])

    #true model
#    lm1 <- lm(y.a~ z.a+ w1 + w2 + w3 + w4 + w8 + w9 + w10, data=sdta)
#    true.est[s] <- lm1$coef[2]
#    cat("truth:",mean(true.est[1:s]),"\n")

    #logit
    logit.ps <- glm(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=sdta, family=binomial(link="logit"))$fitted
    logit.pscore[[s]] <- logit.ps
    m1 <- Match(Y=sdta$y.a, Tr=sdta$z.a, X=logit.ps, estimand="ATT", M=1)
    logit.est[s] <-  m1$est
    logit.nSE[s] <- m1$se.standard
    logit.aiSE[s] <- m1$se

    cat("logit",s,":",m1$est,"\n")
    if(s>1)
      {
        cat("logit mean",s,":",mean(logit.est[1:s]),"\n")
        rmse <- sqrt(mean((logit.est[1:s]-g1)^2))
        cat("logit RMSE",s,":",rmse,"\n")
      }

    #boosting!
    ps.model <- ps(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10,
                   data=sdta, title="none", stop.method=stop.methods["ks.stat.mean"],
                   plots="none", pdf.plots=F, n.trees=20000, interaction.depth=4,
                   shrinkage=0.0005, verbose=FALSE)

    boosting.pscore[[s]] <- ps.model$ps
    boosting.desc[[s]] <- ps.model$desc

    m1 <- Match(Y=sdta$y.a, Tr=sdta$z.a, X=ps.model$ps, estimand="ATT", M=1)
    boosting.est[s] <-  m1$est
    boosting.nSE[s] <- m1$se.standard
    boosting.aiSE[s] <- m1$se

    cat("\nboosting",s,":",m1$est,"\n")
    if(s>1)
      {
        cat("boosting mean",s,":",mean(boosting.est[1:s]),"\n")
        rmse <- sqrt(mean((boosting.est[1:s]-g1)^2))
        cat("boosting RMSE",s,":",rmse,"\n")
      }    


    #Random Forest
    ps1 <- 1
    while (max(ps1)> .999) {
#      gc(verbose=TRUE)
      fps.model <- randomForest(as.factor(z.a) ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, data=sdta)
      ps1 <- fps.model$votes[, 2]
    }
    
    forest.pscore[[s]] <- ps1

    m1 <- Match(Y=sdta$y.a, Tr=sdta$z.a, X=ps1, estimand="ATT", M=1)
    forest.est[s] <-  m1$est
    forest.nSE[s] <- m1$se.standard
    forest.aiSE[s] <- m1$se

    cat("\nforest",s,":",m1$est,"\n")
    if(s>1)
      {
        cat("forest mean",s,":",mean(forest.est[1:s]),"\n")
        rmse <- sqrt(mean((forest.est[1:s]-g1)^2))
        cat("forest RMSE",s,":",rmse,"\n")
      }

    n1 <- nnet(z.a ~ w1+w2+w3+w4+w5+w6+w7+w8+w9+w10, size=40, data=sdta, trace=FALSE)
    nnet.pscore[[s]] <- n1$fitted.values

    m1 <- Match(Y=sdta$y.a, Tr=sdta$z.a, X=n1$fitted.values, estimand="ATT", M=1)
    nnet.est[s] <-  m1$est
    nnet.nSE[s] <- m1$se.standard
    nnet.aiSE[s] <- m1$se

    cat("\nnnet",s,":",m1$est,"\n")
    if(s>1)
      {
        cat("nnet mean",s,":",mean(nnet.est[1:s]),"\n")
        rmse <- sqrt(mean((nnet.est[1:s]-g1)^2))
        cat("nnet RMSE",s,":",rmse,"\n")
      }    

    if (count>50)
      {
        save.image("RData.sims.G.nobs1000")
        count <- 0
      }
  }#end of sims loop


rm(sdta)
save.image("RData.sims.G.nobs1000",compress="bzip2")
